!pip install pandas
!pip install plotly
!pip install dash
!pip install dash_bootstrap_components
Requirement already satisfied: pandas in c:\users\omate\anaconda3\lib\site-packages (1.1.3) Requirement already satisfied: pytz>=2017.2 in c:\users\omate\anaconda3\lib\site-packages (from pandas) (2020.1) Requirement already satisfied: python-dateutil>=2.7.3 in c:\users\omate\anaconda3\lib\site-packages (from pandas) (2.8.1) Requirement already satisfied: numpy>=1.15.4 in c:\users\omate\anaconda3\lib\site-packages (from pandas) (1.19.2) Requirement already satisfied: six>=1.5 in c:\users\omate\anaconda3\lib\site-packages (from python-dateutil>=2.7.3->pandas) (1.15.0) Requirement already satisfied: plotly in c:\users\omate\anaconda3\lib\site-packages (5.8.0) Requirement already satisfied: tenacity>=6.2.0 in c:\users\omate\anaconda3\lib\site-packages (from plotly) (8.0.1) Requirement already satisfied: dash in c:\users\omate\anaconda3\lib\site-packages (2.5.0) Requirement already satisfied: Flask>=1.0.4 in c:\users\omate\anaconda3\lib\site-packages (from dash) (1.1.2) Requirement already satisfied: flask-compress in c:\users\omate\anaconda3\lib\site-packages (from dash) (1.12) Requirement already satisfied: dash-html-components==2.0.0 in c:\users\omate\anaconda3\lib\site-packages (from dash) (2.0.0) Requirement already satisfied: plotly>=5.0.0 in c:\users\omate\anaconda3\lib\site-packages (from dash) (5.8.0) Requirement already satisfied: dash-table==5.0.0 in c:\users\omate\anaconda3\lib\site-packages (from dash) (5.0.0) Requirement already satisfied: dash-core-components==2.0.0 in c:\users\omate\anaconda3\lib\site-packages (from dash) (2.0.0) Requirement already satisfied: Werkzeug>=0.15 in c:\users\omate\anaconda3\lib\site-packages (from Flask>=1.0.4->dash) (1.0.1) Requirement already satisfied: itsdangerous>=0.24 in c:\users\omate\anaconda3\lib\site-packages (from Flask>=1.0.4->dash) (1.1.0) Requirement already satisfied: click>=5.1 in c:\users\omate\anaconda3\lib\site-packages (from Flask>=1.0.4->dash) (7.1.2) Requirement already satisfied: Jinja2>=2.10.1 in c:\users\omate\anaconda3\lib\site-packages (from Flask>=1.0.4->dash) (2.11.2) Requirement already satisfied: brotli in c:\users\omate\anaconda3\lib\site-packages (from flask-compress->dash) (1.0.9) Requirement already satisfied: tenacity>=6.2.0 in c:\users\omate\anaconda3\lib\site-packages (from plotly>=5.0.0->dash) (8.0.1) Requirement already satisfied: MarkupSafe>=0.23 in c:\users\omate\anaconda3\lib\site-packages (from Jinja2>=2.10.1->Flask>=1.0.4->dash) (1.1.1) Requirement already satisfied: dash_bootstrap_components in c:\users\omate\anaconda3\lib\site-packages (1.1.0) Requirement already satisfied: dash>=2.0.0 in c:\users\omate\anaconda3\lib\site-packages (from dash_bootstrap_components) (2.5.0) Requirement already satisfied: dash-core-components==2.0.0 in c:\users\omate\anaconda3\lib\site-packages (from dash>=2.0.0->dash_bootstrap_components) (2.0.0) Requirement already satisfied: dash-table==5.0.0 in c:\users\omate\anaconda3\lib\site-packages (from dash>=2.0.0->dash_bootstrap_components) (5.0.0) Requirement already satisfied: flask-compress in c:\users\omate\anaconda3\lib\site-packages (from dash>=2.0.0->dash_bootstrap_components) (1.12) Requirement already satisfied: Flask>=1.0.4 in c:\users\omate\anaconda3\lib\site-packages (from dash>=2.0.0->dash_bootstrap_components) (1.1.2) Requirement already satisfied: plotly>=5.0.0 in c:\users\omate\anaconda3\lib\site-packages (from dash>=2.0.0->dash_bootstrap_components) (5.8.0) Requirement already satisfied: dash-html-components==2.0.0 in c:\users\omate\anaconda3\lib\site-packages (from dash>=2.0.0->dash_bootstrap_components) (2.0.0) Requirement already satisfied: brotli in c:\users\omate\anaconda3\lib\site-packages (from flask-compress->dash>=2.0.0->dash_bootstrap_components) (1.0.9) Requirement already satisfied: Jinja2>=2.10.1 in c:\users\omate\anaconda3\lib\site-packages (from Flask>=1.0.4->dash>=2.0.0->dash_bootstrap_components) (2.11.2) Requirement already satisfied: itsdangerous>=0.24 in c:\users\omate\anaconda3\lib\site-packages (from Flask>=1.0.4->dash>=2.0.0->dash_bootstrap_components) (1.1.0) Requirement already satisfied: Werkzeug>=0.15 in c:\users\omate\anaconda3\lib\site-packages (from Flask>=1.0.4->dash>=2.0.0->dash_bootstrap_components) (1.0.1) Requirement already satisfied: click>=5.1 in c:\users\omate\anaconda3\lib\site-packages (from Flask>=1.0.4->dash>=2.0.0->dash_bootstrap_components) (7.1.2) Requirement already satisfied: tenacity>=6.2.0 in c:\users\omate\anaconda3\lib\site-packages (from plotly>=5.0.0->dash>=2.0.0->dash_bootstrap_components) (8.0.1) Requirement already satisfied: MarkupSafe>=0.23 in c:\users\omate\anaconda3\lib\site-packages (from Jinja2>=2.10.1->Flask>=1.0.4->dash>=2.0.0->dash_bootstrap_components) (1.1.1)
import pandas as pd
df=pd.read_csv("C:\\Users\\omate\\Downloads\\india___monthly_rainfall_data___1901_to_2002 (1).csv")
df.head()
| State | District | Year | Jan | Feb | Mar | Apr | May | Jun | Jul | Aug | Sep | Oct | Nov | Dec | vlookup | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Andaman & Nicobar Islands | Andaman | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | Andaman & Nicobar IslandsAndaman |
| 1 | Andaman & Nicobar Islands | Nicobar | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | Andaman & Nicobar IslandsNicobar |
| 2 | Andhra Pradesh | Adilabad | 1901.0 | 6.725 | 10.488 | 23.288 | 35.560 | 23.119 | 115.546 | 294.119 | 276.865 | 181.615 | 47.310 | 1.339 | 0.000 | Andhra PradeshAdilabad |
| 3 | Andhra Pradesh | Adilabad | 1902.0 | 0.420 | 0.000 | 0.388 | 6.070 | 3.331 | 45.960 | 233.973 | 167.971 | 198.177 | 26.447 | 35.083 | 11.222 | Andhra PradeshAdilabad |
| 4 | Andhra Pradesh | Adilabad | 1903.0 | 6.643 | 1.956 | 0.173 | 4.551 | 33.348 | 132.078 | 436.611 | 334.544 | 226.037 | 138.818 | 14.095 | 8.823 | Andhra PradeshAdilabad |
df=df.iloc[2:]
df
| State | District | Year | Jan | Feb | Mar | Apr | May | Jun | Jul | Aug | Sep | Oct | Nov | Dec | vlookup | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | Andhra Pradesh | Adilabad | 1901.0 | 6.725 | 10.488 | 23.288 | 35.560 | 23.119 | 115.546 | 294.119 | 276.865 | 181.615 | 47.310 | 1.339 | 0.000 | Andhra PradeshAdilabad |
| 3 | Andhra Pradesh | Adilabad | 1902.0 | 0.420 | 0.000 | 0.388 | 6.070 | 3.331 | 45.960 | 233.973 | 167.971 | 198.177 | 26.447 | 35.083 | 11.222 | Andhra PradeshAdilabad |
| 4 | Andhra Pradesh | Adilabad | 1903.0 | 6.643 | 1.956 | 0.173 | 4.551 | 33.348 | 132.078 | 436.611 | 334.544 | 226.037 | 138.818 | 14.095 | 8.823 | Andhra PradeshAdilabad |
| 5 | Andhra Pradesh | Adilabad | 1904.0 | 0.054 | 0.121 | 11.446 | 0.017 | 16.900 | 131.048 | 160.694 | 81.865 | 251.577 | 110.391 | 0.146 | 0.130 | Andhra PradeshAdilabad |
| 6 | Andhra Pradesh | Adilabad | 1905.0 | 0.589 | 2.293 | 8.252 | 35.020 | 17.569 | 79.937 | 96.331 | 313.522 | 361.697 | 4.950 | 0.146 | 0.000 | Andhra PradeshAdilabad |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 55314 | West Bengal | North Dinajpur | 2000.0 | 11.294 | 10.908 | 10.686 | 27.669 | 110.618 | 264.845 | 185.798 | 297.535 | 280.883 | 22.048 | 0.568 | 0.423 | West BengalNorth Dinajpur |
| 55315 | West Bengal | North Dinajpur | 2001.0 | 1.866 | 4.048 | 21.805 | 36.436 | 152.242 | 164.361 | 311.196 | 271.373 | 165.015 | 124.258 | 2.798 | 0.000 | West BengalNorth Dinajpur |
| 55316 | West Bengal | North Dinajpur | 2002.0 | 14.939 | 3.758 | 12.410 | 54.591 | 80.993 | 189.604 | 276.109 | 285.924 | 215.591 | 108.733 | 17.757 | 0.000 | West BengalNorth Dinajpur |
| 55317 | Lakshadweep | Lakshadweep | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | LakshadweepLakshadweep |
| 55318 | Goa | Goa | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | GoaGoa |
55317 rows × 16 columns
print("Null values before processing:")
df.isnull().sum()
Null values before processing:
State 0 District 0 Year 33 Jan 33 Feb 33 Mar 33 Apr 33 May 33 Jun 33 Jul 33 Aug 33 Sep 33 Oct 33 Nov 33 Dec 33 vlookup 102 dtype: int64
df[df[['Year']].isna ().any (axis=1)]
| State | District | Year | Jan | Feb | Mar | Apr | May | Jun | Jul | Aug | Sep | Oct | Nov | Dec | vlookup | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 11222 | Daman & Diu | Daman | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | Daman & DiuDaman |
| 11223 | Daman & Diu | Diu | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | Daman & DiuDiu |
| 12142 | Gujarat | Jamnagar | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | GujaratJamnagar |
| 12143 | Gujarat | Junagadh | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | GujaratJunagadh |
| 12552 | Gujarat | Navsari | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | GujaratNavsari |
| 12757 | Gujarat | Porbandar | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | GujaratPorbandar |
| 13370 | Gujarat | Valsad | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | GujaratValsad |
| 20715 | Karnataka | Dakshina Kannada | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | KarnatakaDakshina Kannada |
| 22144 | Karnataka | Udupi | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | KarnatakaUdupi |
| 22145 | Karnataka | Uttara Kannada | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | KarnatakaUttara Kannada |
| 22146 | Kerala | Alappuzha | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | KeralaAlappuzha |
| 22147 | Kerala | Ernakulam | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | KeralaErnakulam |
| 22250 | Kerala | Kannur | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | KeralaKannur |
| 22251 | Kerala | Kasaragod | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | KeralaKasaragod |
| 22252 | Kerala | Kollam | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | KeralaKollam |
| 22253 | Kerala | Kottayam | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | KeralaKottayam |
| 22254 | Kerala | Kozhikode | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | KeralaKozhikode |
| 22255 | Kerala | Malappuram | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | KeralaMalappuram |
| 22256 | Kerala | Palakkad | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | KeralaPalakkad |
| 22257 | Kerala | Pathanamthitta | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | KeralaPathanamthitta |
| 22258 | Kerala | Thiruvananthapuram | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | KeralaThiruvananthapuram |
| 22259 | Kerala | Thrissur | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | KeralaThrissur |
| 22260 | Kerala | Wayanad | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | KeralaWayanad |
| 28483 | Maharashtra | Mumbai | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | MaharashtraMumbai |
| 28484 | Maharashtra | Mumbai (Suburban) | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | MaharashtraMumbai (Suburban) |
| 29199 | Maharashtra | Raigarh | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | MaharashtraRaigarh |
| 29200 | Maharashtra | Ratnagiri | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | MaharashtraRatnagiri |
| 29405 | Maharashtra | Sindhudurg | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | MaharashtraSindhudurg |
| 29508 | Maharashtra | Thane | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | MaharashtraThane |
| 35935 | Pondicherry | Mahe | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | PondicherryMahe |
| 42464 | Tamil Nadu | Kanniyakumari | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | Tamil NaduKanniyakumari |
| 55317 | Lakshadweep | Lakshadweep | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | LakshadweepLakshadweep |
| 55318 | Goa | Goa | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | GoaGoa |
len(df[df[['Year']].isna ().any (axis=1)])
33
df=df[~df[['Year']].isna ().any (axis=1)]
df
| State | District | Year | Jan | Feb | Mar | Apr | May | Jun | Jul | Aug | Sep | Oct | Nov | Dec | vlookup | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | Andhra Pradesh | Adilabad | 1901.0 | 6.725 | 10.488 | 23.288 | 35.560 | 23.119 | 115.546 | 294.119 | 276.865 | 181.615 | 47.310 | 1.339 | 0.000 | Andhra PradeshAdilabad |
| 3 | Andhra Pradesh | Adilabad | 1902.0 | 0.420 | 0.000 | 0.388 | 6.070 | 3.331 | 45.960 | 233.973 | 167.971 | 198.177 | 26.447 | 35.083 | 11.222 | Andhra PradeshAdilabad |
| 4 | Andhra Pradesh | Adilabad | 1903.0 | 6.643 | 1.956 | 0.173 | 4.551 | 33.348 | 132.078 | 436.611 | 334.544 | 226.037 | 138.818 | 14.095 | 8.823 | Andhra PradeshAdilabad |
| 5 | Andhra Pradesh | Adilabad | 1904.0 | 0.054 | 0.121 | 11.446 | 0.017 | 16.900 | 131.048 | 160.694 | 81.865 | 251.577 | 110.391 | 0.146 | 0.130 | Andhra PradeshAdilabad |
| 6 | Andhra Pradesh | Adilabad | 1905.0 | 0.589 | 2.293 | 8.252 | 35.020 | 17.569 | 79.937 | 96.331 | 313.522 | 361.697 | 4.950 | 0.146 | 0.000 | Andhra PradeshAdilabad |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 55312 | West Bengal | North Dinajpur | 1998.0 | 3.676 | 16.042 | 15.435 | 43.497 | 189.031 | 101.695 | 275.914 | 316.537 | 262.286 | 256.652 | 15.486 | 0.000 | West BengalNorth Dinajpur |
| 55313 | West Bengal | North Dinajpur | 1999.0 | 7.867 | 2.932 | 2.066 | 3.020 | 151.115 | 214.111 | 380.077 | 375.183 | 232.015 | 85.839 | 7.591 | 0.255 | West BengalNorth Dinajpur |
| 55314 | West Bengal | North Dinajpur | 2000.0 | 11.294 | 10.908 | 10.686 | 27.669 | 110.618 | 264.845 | 185.798 | 297.535 | 280.883 | 22.048 | 0.568 | 0.423 | West BengalNorth Dinajpur |
| 55315 | West Bengal | North Dinajpur | 2001.0 | 1.866 | 4.048 | 21.805 | 36.436 | 152.242 | 164.361 | 311.196 | 271.373 | 165.015 | 124.258 | 2.798 | 0.000 | West BengalNorth Dinajpur |
| 55316 | West Bengal | North Dinajpur | 2002.0 | 14.939 | 3.758 | 12.410 | 54.591 | 80.993 | 189.604 | 276.109 | 285.924 | 215.591 | 108.733 | 17.757 | 0.000 | West BengalNorth Dinajpur |
55284 rows × 16 columns
df['vlookup'] = df['vlookup'].fillna(df['State']+df['District'])
df
<ipython-input-9-3de84ecf340c>:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy df['vlookup'] = df['vlookup'].fillna(df['State']+df['District'])
| State | District | Year | Jan | Feb | Mar | Apr | May | Jun | Jul | Aug | Sep | Oct | Nov | Dec | vlookup | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | Andhra Pradesh | Adilabad | 1901.0 | 6.725 | 10.488 | 23.288 | 35.560 | 23.119 | 115.546 | 294.119 | 276.865 | 181.615 | 47.310 | 1.339 | 0.000 | Andhra PradeshAdilabad |
| 3 | Andhra Pradesh | Adilabad | 1902.0 | 0.420 | 0.000 | 0.388 | 6.070 | 3.331 | 45.960 | 233.973 | 167.971 | 198.177 | 26.447 | 35.083 | 11.222 | Andhra PradeshAdilabad |
| 4 | Andhra Pradesh | Adilabad | 1903.0 | 6.643 | 1.956 | 0.173 | 4.551 | 33.348 | 132.078 | 436.611 | 334.544 | 226.037 | 138.818 | 14.095 | 8.823 | Andhra PradeshAdilabad |
| 5 | Andhra Pradesh | Adilabad | 1904.0 | 0.054 | 0.121 | 11.446 | 0.017 | 16.900 | 131.048 | 160.694 | 81.865 | 251.577 | 110.391 | 0.146 | 0.130 | Andhra PradeshAdilabad |
| 6 | Andhra Pradesh | Adilabad | 1905.0 | 0.589 | 2.293 | 8.252 | 35.020 | 17.569 | 79.937 | 96.331 | 313.522 | 361.697 | 4.950 | 0.146 | 0.000 | Andhra PradeshAdilabad |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 55312 | West Bengal | North Dinajpur | 1998.0 | 3.676 | 16.042 | 15.435 | 43.497 | 189.031 | 101.695 | 275.914 | 316.537 | 262.286 | 256.652 | 15.486 | 0.000 | West BengalNorth Dinajpur |
| 55313 | West Bengal | North Dinajpur | 1999.0 | 7.867 | 2.932 | 2.066 | 3.020 | 151.115 | 214.111 | 380.077 | 375.183 | 232.015 | 85.839 | 7.591 | 0.255 | West BengalNorth Dinajpur |
| 55314 | West Bengal | North Dinajpur | 2000.0 | 11.294 | 10.908 | 10.686 | 27.669 | 110.618 | 264.845 | 185.798 | 297.535 | 280.883 | 22.048 | 0.568 | 0.423 | West BengalNorth Dinajpur |
| 55315 | West Bengal | North Dinajpur | 2001.0 | 1.866 | 4.048 | 21.805 | 36.436 | 152.242 | 164.361 | 311.196 | 271.373 | 165.015 | 124.258 | 2.798 | 0.000 | West BengalNorth Dinajpur |
| 55316 | West Bengal | North Dinajpur | 2002.0 | 14.939 | 3.758 | 12.410 | 54.591 | 80.993 | 189.604 | 276.109 | 285.924 | 215.591 | 108.733 | 17.757 | 0.000 | West BengalNorth Dinajpur |
55284 rows × 16 columns
print("Null values after processing:")
df.isnull().sum()
Null values after processing:
State 0 District 0 Year 0 Jan 0 Feb 0 Mar 0 Apr 0 May 0 Jun 0 Jul 0 Aug 0 Sep 0 Oct 0 Nov 0 Dec 0 vlookup 0 dtype: int64
print("Co-Variance Matrix")
df.cov()
Co-Variance Matrix
| Year | Jan | Feb | Mar | Apr | May | Jun | Jul | Aug | Sep | Oct | Nov | Dec | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Year | 866.932348 | 3.472550 | -30.216839 | 19.364439 | -10.206869 | 14.997325 | -146.250203 | -94.709961 | -46.587178 | -45.853489 | 72.554484 | -22.358163 | 23.882629 |
| Jan | 3.472550 | 335.401558 | 102.801691 | 104.282866 | 78.556940 | 7.485957 | -144.002417 | -130.411822 | 124.228681 | 4.702196 | -49.933445 | 14.990472 | 54.653387 |
| Feb | -30.216839 | 102.801691 | 494.028537 | 276.304620 | 426.575517 | 668.825260 | 840.584787 | 504.102901 | 658.703433 | 362.480899 | 230.412530 | -10.941797 | 23.717749 |
| Mar | 19.364439 | 104.282866 | 276.304620 | 1187.148176 | 1332.556364 | 2362.848442 | 3081.834649 | 2075.056854 | 1771.536503 | 1181.040630 | 835.525463 | 64.004500 | 39.115240 |
| Apr | -10.206869 | 78.556940 | 426.575517 | 1332.556364 | 4838.730654 | 6570.511426 | 8906.494022 | 5786.318829 | 3866.553586 | 3250.033571 | 2935.008564 | 664.510032 | 115.421430 |
| May | 14.997325 | 7.485957 | 668.825260 | 2362.848442 | 6570.511426 | 16950.075713 | 18660.285660 | 12565.582830 | 9052.677761 | 7052.653334 | 5957.579351 | 1045.412087 | 140.574974 |
| Jun | -146.250203 | -144.002417 | 840.584787 | 3081.834649 | 8906.494022 | 18660.285660 | 40819.594550 | 26031.017208 | 18606.913694 | 12937.462090 | 8566.109179 | 931.692439 | -327.454205 |
| Jul | -94.709961 | -130.411822 | 504.102901 | 2075.056854 | 5786.318829 | 12565.582830 | 26031.017208 | 41584.688033 | 20669.012821 | 13780.762233 | 4777.533129 | -1036.934627 | -838.736478 |
| Aug | -46.587178 | 124.228681 | 658.703433 | 1771.536503 | 3866.553586 | 9052.677761 | 18606.913694 | 20669.012821 | 27412.086234 | 10298.077031 | 3085.359300 | -1039.512149 | -686.295322 |
| Sep | -45.853489 | 4.702196 | 362.480899 | 1181.040630 | 3250.033571 | 7052.653334 | 12937.462090 | 13780.762233 | 10298.077031 | 16044.794407 | 3695.459896 | -65.476695 | -261.389108 |
| Oct | 72.554484 | -49.933445 | 230.412530 | 835.525463 | 2935.008564 | 5957.579351 | 8566.109179 | 4777.533129 | 3085.359300 | 3695.459896 | 8710.970814 | 2173.239251 | 595.658085 |
| Nov | -22.358163 | 14.990472 | -10.941797 | 64.004500 | 664.510032 | 1045.412087 | 931.692439 | -1036.934627 | -1039.512149 | -65.476695 | 2173.239251 | 3390.425621 | 800.637200 |
| Dec | 23.882629 | 54.653387 | 23.717749 | 39.115240 | 115.421430 | 140.574974 | -327.454205 | -838.736478 | -686.295322 | -261.389108 | 595.658085 | 800.637200 | 954.038586 |
print("Correlation Matrix")
df.corr()
Correlation Matrix
| Year | Jan | Feb | Mar | Apr | May | Jun | Jul | Aug | Sep | Oct | Nov | Dec | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Year | 1.000000 | 0.006440 | -0.046172 | 0.019088 | -0.004983 | 0.003912 | -0.024585 | -0.015774 | -0.009557 | -0.012295 | 0.026402 | -0.013041 | 0.026261 |
| Jan | 0.006440 | 1.000000 | 0.252547 | 0.165264 | 0.061665 | 0.003140 | -0.038918 | -0.034919 | 0.040970 | 0.002027 | -0.029213 | 0.014057 | 0.096617 |
| Feb | -0.046172 | 0.252547 | 1.000000 | 0.360794 | 0.275902 | 0.231127 | 0.187185 | 0.111218 | 0.178996 | 0.128748 | 0.111070 | -0.008454 | 0.034547 |
| Mar | 0.019088 | 0.165264 | 0.360794 | 1.000000 | 0.555991 | 0.526742 | 0.442713 | 0.295332 | 0.310546 | 0.270611 | 0.259821 | 0.031903 | 0.036755 |
| Apr | -0.004983 | 0.061665 | 0.275902 | 0.555991 | 1.000000 | 0.725517 | 0.633734 | 0.407915 | 0.335728 | 0.368854 | 0.452075 | 0.164062 | 0.053720 |
| May | 0.003912 | 0.003140 | 0.231127 | 0.526742 | 0.725517 | 1.000000 | 0.709411 | 0.473293 | 0.419972 | 0.427661 | 0.490287 | 0.137903 | 0.034957 |
| Jun | -0.024585 | -0.038918 | 0.187185 | 0.442713 | 0.633734 | 0.709411 | 1.000000 | 0.631815 | 0.556249 | 0.505531 | 0.454272 | 0.079197 | -0.052473 |
| Jul | -0.015774 | -0.034919 | 0.111218 | 0.295332 | 0.407915 | 0.473293 | 0.631815 | 1.000000 | 0.612184 | 0.533506 | 0.251017 | -0.087329 | -0.133161 |
| Aug | -0.009557 | 0.040970 | 0.178996 | 0.310546 | 0.335728 | 0.419972 | 0.556249 | 0.612184 | 1.000000 | 0.491041 | 0.199665 | -0.107828 | -0.134201 |
| Sep | -0.012295 | 0.002027 | 0.128748 | 0.270611 | 0.368854 | 0.427661 | 0.505531 | 0.533506 | 0.491041 | 1.000000 | 0.312585 | -0.008878 | -0.066809 |
| Oct | 0.026402 | -0.029213 | 0.111070 | 0.259821 | 0.452075 | 0.490287 | 0.454272 | 0.251017 | 0.199665 | 0.312585 | 1.000000 | 0.399896 | 0.206624 |
| Nov | -0.013041 | 0.014057 | -0.008454 | 0.031903 | 0.164062 | 0.137903 | 0.079197 | -0.087329 | -0.107828 | -0.008878 | 0.399896 | 1.000000 | 0.445170 |
| Dec | 0.026261 | 0.096617 | 0.034547 | 0.036755 | 0.053720 | 0.034957 | -0.052473 | -0.133161 | -0.134201 | -0.066809 | 0.206624 | 0.445170 | 1.000000 |
df['mean_rainfall']=df.iloc[:,3:15].mean(axis=1)
df
<ipython-input-13-f873d37c5629>:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy df['mean_rainfall']=df.iloc[:,3:15].mean(axis=1)
| State | District | Year | Jan | Feb | Mar | Apr | May | Jun | Jul | Aug | Sep | Oct | Nov | Dec | vlookup | mean_rainfall | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | Andhra Pradesh | Adilabad | 1901.0 | 6.725 | 10.488 | 23.288 | 35.560 | 23.119 | 115.546 | 294.119 | 276.865 | 181.615 | 47.310 | 1.339 | 0.000 | Andhra PradeshAdilabad | 84.664500 |
| 3 | Andhra Pradesh | Adilabad | 1902.0 | 0.420 | 0.000 | 0.388 | 6.070 | 3.331 | 45.960 | 233.973 | 167.971 | 198.177 | 26.447 | 35.083 | 11.222 | Andhra PradeshAdilabad | 60.753500 |
| 4 | Andhra Pradesh | Adilabad | 1903.0 | 6.643 | 1.956 | 0.173 | 4.551 | 33.348 | 132.078 | 436.611 | 334.544 | 226.037 | 138.818 | 14.095 | 8.823 | Andhra PradeshAdilabad | 111.473083 |
| 5 | Andhra Pradesh | Adilabad | 1904.0 | 0.054 | 0.121 | 11.446 | 0.017 | 16.900 | 131.048 | 160.694 | 81.865 | 251.577 | 110.391 | 0.146 | 0.130 | Andhra PradeshAdilabad | 63.699083 |
| 6 | Andhra Pradesh | Adilabad | 1905.0 | 0.589 | 2.293 | 8.252 | 35.020 | 17.569 | 79.937 | 96.331 | 313.522 | 361.697 | 4.950 | 0.146 | 0.000 | Andhra PradeshAdilabad | 76.692167 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 55312 | West Bengal | North Dinajpur | 1998.0 | 3.676 | 16.042 | 15.435 | 43.497 | 189.031 | 101.695 | 275.914 | 316.537 | 262.286 | 256.652 | 15.486 | 0.000 | West BengalNorth Dinajpur | 124.687583 |
| 55313 | West Bengal | North Dinajpur | 1999.0 | 7.867 | 2.932 | 2.066 | 3.020 | 151.115 | 214.111 | 380.077 | 375.183 | 232.015 | 85.839 | 7.591 | 0.255 | West BengalNorth Dinajpur | 121.839250 |
| 55314 | West Bengal | North Dinajpur | 2000.0 | 11.294 | 10.908 | 10.686 | 27.669 | 110.618 | 264.845 | 185.798 | 297.535 | 280.883 | 22.048 | 0.568 | 0.423 | West BengalNorth Dinajpur | 101.939583 |
| 55315 | West Bengal | North Dinajpur | 2001.0 | 1.866 | 4.048 | 21.805 | 36.436 | 152.242 | 164.361 | 311.196 | 271.373 | 165.015 | 124.258 | 2.798 | 0.000 | West BengalNorth Dinajpur | 104.616500 |
| 55316 | West Bengal | North Dinajpur | 2002.0 | 14.939 | 3.758 | 12.410 | 54.591 | 80.993 | 189.604 | 276.109 | 285.924 | 215.591 | 108.733 | 17.757 | 0.000 | West BengalNorth Dinajpur | 105.034083 |
55284 rows × 17 columns
import matplotlib.pyplot as plt
a = df.groupby('State').mean()
plt.figure(figsize=(16,6),dpi=80)
plt.xticks(rotation=90)
plt.plot(a['mean_rainfall'],label='mean_rainfall')
plt.legend(loc='best')
plt.title("Mean rainfall by State")
Text(0.5, 1.0, 'Mean rainfall by State')
df['Dec-Feb']=df[['Dec','Jan','Feb']].sum(axis=1)
df['Mar-Jun']=df[['Mar','Apr','May','Jun']].sum(axis=1)
df['Jul-Nov']=df[['Jul','Aug','Sep','Oct','Nov']].sum(axis=1)
df
<ipython-input-15-bf2e370200f1>:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy df['Dec-Feb']=df[['Dec','Jan','Feb']].sum(axis=1) <ipython-input-15-bf2e370200f1>:2: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy df['Mar-Jun']=df[['Mar','Apr','May','Jun']].sum(axis=1) <ipython-input-15-bf2e370200f1>:3: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy df['Jul-Nov']=df[['Jul','Aug','Sep','Oct','Nov']].sum(axis=1)
| State | District | Year | Jan | Feb | Mar | Apr | May | Jun | Jul | Aug | Sep | Oct | Nov | Dec | vlookup | mean_rainfall | Dec-Feb | Mar-Jun | Jul-Nov | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | Andhra Pradesh | Adilabad | 1901.0 | 6.725 | 10.488 | 23.288 | 35.560 | 23.119 | 115.546 | 294.119 | 276.865 | 181.615 | 47.310 | 1.339 | 0.000 | Andhra PradeshAdilabad | 84.664500 | 17.213 | 197.513 | 801.248 |
| 3 | Andhra Pradesh | Adilabad | 1902.0 | 0.420 | 0.000 | 0.388 | 6.070 | 3.331 | 45.960 | 233.973 | 167.971 | 198.177 | 26.447 | 35.083 | 11.222 | Andhra PradeshAdilabad | 60.753500 | 11.642 | 55.749 | 661.651 |
| 4 | Andhra Pradesh | Adilabad | 1903.0 | 6.643 | 1.956 | 0.173 | 4.551 | 33.348 | 132.078 | 436.611 | 334.544 | 226.037 | 138.818 | 14.095 | 8.823 | Andhra PradeshAdilabad | 111.473083 | 17.422 | 170.150 | 1150.105 |
| 5 | Andhra Pradesh | Adilabad | 1904.0 | 0.054 | 0.121 | 11.446 | 0.017 | 16.900 | 131.048 | 160.694 | 81.865 | 251.577 | 110.391 | 0.146 | 0.130 | Andhra PradeshAdilabad | 63.699083 | 0.305 | 159.411 | 604.673 |
| 6 | Andhra Pradesh | Adilabad | 1905.0 | 0.589 | 2.293 | 8.252 | 35.020 | 17.569 | 79.937 | 96.331 | 313.522 | 361.697 | 4.950 | 0.146 | 0.000 | Andhra PradeshAdilabad | 76.692167 | 2.882 | 140.778 | 776.646 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 55312 | West Bengal | North Dinajpur | 1998.0 | 3.676 | 16.042 | 15.435 | 43.497 | 189.031 | 101.695 | 275.914 | 316.537 | 262.286 | 256.652 | 15.486 | 0.000 | West BengalNorth Dinajpur | 124.687583 | 19.718 | 349.658 | 1126.875 |
| 55313 | West Bengal | North Dinajpur | 1999.0 | 7.867 | 2.932 | 2.066 | 3.020 | 151.115 | 214.111 | 380.077 | 375.183 | 232.015 | 85.839 | 7.591 | 0.255 | West BengalNorth Dinajpur | 121.839250 | 11.054 | 370.312 | 1080.705 |
| 55314 | West Bengal | North Dinajpur | 2000.0 | 11.294 | 10.908 | 10.686 | 27.669 | 110.618 | 264.845 | 185.798 | 297.535 | 280.883 | 22.048 | 0.568 | 0.423 | West BengalNorth Dinajpur | 101.939583 | 22.625 | 413.818 | 786.832 |
| 55315 | West Bengal | North Dinajpur | 2001.0 | 1.866 | 4.048 | 21.805 | 36.436 | 152.242 | 164.361 | 311.196 | 271.373 | 165.015 | 124.258 | 2.798 | 0.000 | West BengalNorth Dinajpur | 104.616500 | 5.914 | 374.844 | 874.640 |
| 55316 | West Bengal | North Dinajpur | 2002.0 | 14.939 | 3.758 | 12.410 | 54.591 | 80.993 | 189.604 | 276.109 | 285.924 | 215.591 | 108.733 | 17.757 | 0.000 | West BengalNorth Dinajpur | 105.034083 | 18.697 | 337.598 | 904.114 |
55284 rows × 20 columns
plt.figure(figsize=(16,6),dpi=80)
plt.xticks(rotation=90)
a = df.groupby('State').mean()
plt.plot(a['Dec-Feb'],label='Dec-Feb')
plt.plot(a['Mar-Jun'],label='Mar-Jun')
plt.plot(a['Jul-Nov'],label='Jul-Nov')
plt.legend(loc='best')
plt.title("Seasonal variation in rainfall for different states")
Text(0.5, 1.0, 'Seasonal variation in rainfall for different states')
bplot = df[['State', 'Dec-Feb', 'Mar-Jun','Jul-Nov']].groupby(df['State']).sum().plot.bar(stacked=True,figsize=(20,12))
print("Stacked Bar Graph for Rainfall in Different States")
Stacked Bar Graph for Rainfall in Different States
df[['State', 'Dec-Feb', 'Mar-Jun','Jul-Nov']].groupby(df['District']).sum()
| Dec-Feb | Mar-Jun | Jul-Nov | |
|---|---|---|---|
| District | |||
| Adilabad | 1855.857 | 19259.712 | 80664.710 |
| Agra | 2206.319 | 7091.313 | 67288.460 |
| Ahmadabad | 263.360 | 9924.743 | 51085.914 |
| Ahmadnagar | 803.282 | 20929.584 | 64834.537 |
| Aizwal | 4907.794 | 116523.837 | 156655.649 |
| ... | ... | ... | ... |
| Wokha | 6033.575 | 95579.918 | 114503.666 |
| Yamunanagar | 5505.292 | 11308.169 | 54242.271 |
| Yanam | 4348.600 | 22870.200 | 93319.200 |
| Yavatmal | 2321.037 | 18040.780 | 78974.286 |
| Zunheboto | 5539.647 | 85844.541 | 116013.998 |
540 rows × 3 columns
import seaborn as sns
fig = plt.figure(figsize=(300, 20))
plt.xticks(rotation='vertical')
sns.boxplot(x='District', y='mean_rainfall', data=df)
plt.title("Mean rainfall for each State")
fig.update_layout(xaxis=dict(rangeslider=dict(visible=True),type="linear"))
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) <ipython-input-19-32ac5e3504cf> in <module> 6 plt.title("Mean rainfall for each State") 7 ----> 8 fig.update_layout(xaxis=dict(rangeslider=dict(visible=True),type="linear")) AttributeError: 'Figure' object has no attribute 'update_layout'
import plotly.express as px
fig = px.line_polar(df, r="mean_rainfall",theta="State",
color='Year' ,line_close=True,
color_discrete_sequence=px.colors.sequential.Plasma_r,
template="plotly_dark")
fig.show()
fig = px.scatter_polar(df, r="mean_rainfall", theta="State",
color="mean_rainfall", symbol="Year", size="mean_rainfall",
color_discrete_sequence=px.colors.sequential.Plasma_r)
fig.show()
px.scatter(df, x="Year",
y="mean_rainfall", animation_frame="Year", animation_group="State",
size="mean_rainfall", color="State", hover_name="State",
title='Mean Rainfall of each State from years 1900 to 2002',
log_x=True, size_max=50, range_x=[1899,2004],range_y=[0,300])
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
districts = df['District'].unique()
np.random.seed(100)
mycolors = np.random.choice(list(mpl.colors.XKCD_COLORS.keys()), len(districts), replace=False)
plt.figure(figsize=(16,12), dpi= 80)
for i, y in enumerate(districts):
if i > 0:
plt.plot('Year', 'mean_rainfall', data=df.loc[df.District==y, :][['Year','mean_rainfall']], color=mycolors[i], label=y)
plt.text(df.loc[df.District==y, 'Year'][-1:].values[0]+3, df.loc[df.District==y, 'mean_rainfall'][-1:].values[0], y, fontsize=12, color=mycolors[i])
plt.gca().set(xlim=(1900, 2003), ylim=(0,1000), ylabel='$Mean Rainfall$', xlabel='$Year$')
plt.yticks(fontsize=12, alpha=.7)
plt.title("Time Series of Rainfall Data from 1900 to 2002 for all cities")
plt.show()
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
districts = df['District'].unique()
np.random.seed(2)
mycolors = np.random.choice(list(mpl.colors.XKCD_COLORS.keys()), len(districts), replace=False)
plt.figure(figsize=(16,12), dpi= 80)
for i, y in enumerate(districts[:5]):
plt.plot('Year', 'mean_rainfall', data=df.loc[df.District==y, :][['Year','mean_rainfall']], color=mycolors[i], label=y)
plt.text(df.loc[df.District==y, 'Year'][-1:].values[0]+3, df.loc[df.District==y, 'mean_rainfall'][-1:].values[0], y, fontsize=12, color=mycolors[i])
plt.gca().set(xlim=(1900, 2003), ylim=(0,150), ylabel='$Mean Rainfall$', xlabel='$Year$')
plt.yticks(fontsize=12, alpha=.7)
plt.title("Time Series of Rainfall Data from 1900 to 2002 for 10 cities")
plt.show()